home *** CD-ROM | disk | FTP | other *** search
- (*===========================================================================*)
- (* Match a string *)
- (* *)
- (* Copyright 1988, 1989, 1990 by H. Roy Engehausen. All rights reserved. *)
- (* *)
- (* The purpose of this function is to match two strings where one of them *)
- (* can contain pattern information rather than a straight string to *)
- (* string comparison. *)
- (* *)
- (* Special characters in the pattern are: *)
- (* *)
- (* * -- Matches 0 or more characters of any type *)
- (* @ -- a..z, A..Z *)
- (* # -- 0..9 *)
- (* + -- A..z, A..Z, 0..9 *)
- (* ? -- A-F, a-f, 0-9 (a hex number) *)
- (* < -- a..z *)
- (* > -- A..Z *)
- (* $ -- =@#+?<>$ *)
- (* = -- Any one character *)
- (* " -- Escape. The next character in the pattern much match exactly.*)
- (* wild cards will not be interpreted. *)
- (* ( -- Group. A "(" starts a group and a ")" ends it. For each *)
- (* group, there must be a matching character in the string. *)
- (* Example: (abc) matches b. Wild cards are not interpreted. *)
- (* \ -- The string to the right of the \ is optional. It can be *)
- (* present and if so must match. If it is missing, it is *)
- (* a match. Example: a\a will match with a or aa but not ab *)
- (* *)
- (*===========================================================================*)
-
- {$V-} (* All checks off *)
- {$R-}
- {$F-}
- {$O+} (* OK for overlay *)
-
- UNIT match;
-
- INTERFACE
-
- FUNCTION match_str(in_str : STRING; pattern : STRING) : BOOLEAN;
-
- (*===========================================================================*)
- (* Global variables and types *)
- (*===========================================================================*)
-
- TYPE
- match_table = ARRAY[0..255] OF CHAR;
-
- match_branch_value = (br_at,
- br_dollar,
- br_dq,
- br_equal,
- br_gt,
- br_lparen,
- br_lt,
- br_plus,
- br_pound,
- br_question,
- br_star,
- br_bslash,
- br_other);
-
- match_branch_array = ARRAY[0..255] OF match_branch_value;
-
- VAR
- match_strtab : match_table;
- match_branch_table : match_branch_array;
-
- IMPLEMENTATION
-
-
-
-
- (*===========================================================================*)
- (* *)
- (*===========================================================================*)
-
- FUNCTION match_str(in_str : STRING; pattern : STRING) : BOOLEAN;
-
- VAR
- grptab : match_table;
-
- (*=========================================================================*)
- (* Subfunction for substring match *)
- (*=========================================================================*)
-
- {$S-} (* All checks off *)
-
- FUNCTION match_subs(ip :BYTE; pp : BYTE) : BOOLEAN;
-
- VAR
- b : BOOLEAN;
- cp : CHAR;
- cs : CHAR;
- t : BYTE;
-
- LABEL iterate;
-
- BEGIN;
-
- {$IFDEF prt}
- WRITELN('.', in_str, '/' , pattern);
- {$ENDIF}
-
- (*---------------------------------------------------------------------*)
- (* Set up things *)
- (*---------------------------------------------------------------------*)
-
- match_subs := FALSE;
-
- (*---------------------------------------------------------------------*)
- (* Loop until we run out of things *)
- (*---------------------------------------------------------------------*)
-
- WHILE (ip <= LENGTH(in_str)) AND (pp <= LENGTH(pattern)) DO
- BEGIN;
-
- (*-----------------------------------------------------------------*)
- (* Get the characters to check *)
- (*-----------------------------------------------------------------*)
-
- cs := in_str[ip];
- cp := pattern[pp];
- t := ORD(match_strtab[ORD(cs)]);
-
- (*-----------------------------------------------------------------*)
- (* Find out pattern type *)
- (*-----------------------------------------------------------------*)
-
- CASE match_branch_table[ORD(cp)] OF
-
- (*---------------------------------------------------------------*)
- (* Must match exactly! *)
- (*---------------------------------------------------------------*)
-
- br_other :
- BEGIN;
-
- {$IFDEF prt}
- WRITELN('CHAR.' , in_c, p_c, '..');
- {$ENDIF}
-
- IF cs <> cp THEN EXIT;
- GOTO iterate;
-
- END;
-
- (*---------------------------------------------------------------*)
- (* Wild card? *)
- (*---------------------------------------------------------------*)
-
- br_star :
- BEGIN;
-
- IF pp = LENGTH(pattern) THEN
- BEGIN;
- match_subs := TRUE;
- EXIT;
- END;
-
- b := match_subs(ip, pp + 1);
-
- IF b THEN
- BEGIN;
- match_subs := TRUE;
- EXIT;
- END;
-
- match_subs := match_subs(ip + 1, pp);
-
- EXIT;
-
- END;
-
- (*---------------------------------------------------------------*)
- (* Backslash (\) *)
- (*---------------------------------------------------------------*)
-
- br_bslash :
- BEGIN;
- IF pp = LENGTH(pattern) THEN
- match_subs := FALSE
- ELSE
- BEGIN;
- b := match_subs(ip, pp + 1);
- match_subs := b;
- END;
- EXIT;
- END;
-
- (*---------------------------------------------------------------*)
- (* Any one character *)
- (*---------------------------------------------------------------*)
-
- br_equal : GOTO iterate;
-
- (*---------------------------------------------------------------*)
- (* Group *)
- (*---------------------------------------------------------------*)
-
- br_lparen:
- BEGIN;
-
- FILLCHAR(grptab, SIZEOF(grptab), CHR(0));
- INC(pp);
-
- WHILE (pp <= LENGTH(pattern)) AND (pattern[pp] <> ')') DO
- BEGIN;
- grptab[ORD(pattern[pp])] := 'X';
- INC(pp);
- END;
-
- IF (pp > LENGTH(pattern))
- OR (grptab[ORD(in_str[ip])] = CHR(0)) THEN
- EXIT;
-
- GOTO iterate;
-
- END;
-
- (*---------------------------------------------------------------*)
- (* Escape *)
- (*---------------------------------------------------------------*)
-
- br_dq:
- BEGIN;
-
- IF pp < LENGTH(pattern) THEN
- BEGIN;
- INC(pp);
- cp := pattern[pp];
- END;
-
- IF cs <> cp THEN EXIT;
- GOTO iterate;
-
- END;
-
- (*---------------------------------------------------------------*)
- (* 0-9 *)
- (*---------------------------------------------------------------*)
-
- br_pound : t := $08 AND t;
-
- (*---------------------------------------------------------------*)
- (* A-Z, a-z *)
- (*---------------------------------------------------------------*)
-
- br_at : t := $06 AND t;
-
- (*---------------------------------------------------------------*)
- (* A-Z, a-z, 0-9 *)
- (*---------------------------------------------------------------*)
-
- br_plus : t := $0E AND t;
-
- (*---------------------------------------------------------------*)
- (* Hex number (A-F, a-f, 0-9) *)
- (*---------------------------------------------------------------*)
-
- br_question : t := $20 AND t;
-
- (*---------------------------------------------------------------*)
- (* a-z *)
- (*---------------------------------------------------------------*)
-
- br_lt : t := $02 AND t;
-
- (*---------------------------------------------------------------*)
- (* A-Z *)
- (*---------------------------------------------------------------*)
-
- br_gt : t := $04 AND t;
-
- (*---------------------------------------------------------------*)
- (* Special characters (=@#+?<>$) *)
- (*---------------------------------------------------------------*)
-
- br_dollar : t := $01 AND t;
-
- END; (*----- End CASE statment ------------------------------------*)
-
- (*-----------------------------------------------------------------*)
- (* If we fall out here then the variable "t" controls true/false *)
- (*-----------------------------------------------------------------*)
-
- IF t = 0 THEN EXIT;
-
- (*-----------------------------------------------------------------*)
- (* This is the label we use to ITERATE thru the loop *)
- (*-----------------------------------------------------------------*)
-
- iterate:
-
- INC(ip);
- INC(pp);
-
- END; (*----- End loop thru string -----------------------------------*)
-
- (*---------------------------------------------------------------------*)
- (* Check for the special case of a pattern that ends in * or \ *)
- (*---------------------------------------------------------------------*)
-
- IF (ip <= LENGTH(in_str))
- OR ((pp = LENGTH(pattern)) AND (pattern[pp] <> '*'))
- OR ((pp < LENGTH(pattern)) AND (pattern[pp] <> '\')) THEN EXIT;
-
- (*---------------------------------------------------------------------*)
- (* Whoopie.. We have a match *)
- (*---------------------------------------------------------------------*)
-
- match_subs := TRUE;
-
- END; (*----- End substring matcher --------------------------------------*)
-
- (*=========================================================================*)
- (* Main line! *)
- (*=========================================================================*)
-
- BEGIN;
-
- (*-----------------------------------------------------------------------*)
- (* Match it *)
- (*-----------------------------------------------------------------------*)
-
- match_str := match_subs(1, 1)
-
- END;
-
- END.